rm(list=ls())
setwd("/scratch/cds2083/")

library(foreign)
library(BayesTree)
library(KRLS)
library(arm)
library(e1071)
library(SuperLearner)
library(MatchIt)

# Functions

SL.KRLS <- function(Y, X, newX, family,...){
	if(family$family=="gaussian"){
		fit.krls <- krls(X=X, y=Y)
		pred <- predict.krls(fit.krls, newdata=as.matrix(newX))$fit
		fit <- list(object=fit.krls)
	}
	if(family$family=="binomial"){
		fit.krls <- krls(X=X, y=Y)
		pred <- predict.krls(fit.krls, newdata=as.matrix(newX))$fit
		fit <- list(object=fit.krls)
	}
	out <- list(pred=pred, fit=fit)
	class(out$fit) <- c("SL.KRLS")
	return(out)
}

predict.SL.KRLS <- function(object, newdata, ...){
	pred <- predict(object, 
				newdata=newdata)$fit
	return(pred)
}

 SL.bart <- function (Y, X, newX, family, ntree = 300, sigdf = 3, 
 				sigquant = 0.9, k = 2, power = 2, base = 0.95, 
 				binaryOffset = 0, ndpost = 1000, nskip = 100, ...) 
{
    if (family$family == "gaussian") {
        fitBart <- bart(x.train = X, y.train = Y, x.test = newX, 
            ntree = ntree, sigdf = sigdf, sigquant = sigquant, 
            k = k, power = power, base = base, binaryOffset = binaryOffset, 
            ndpost = ndpost, nskip = nskip, verbose = FALSE)
        pred <- fitBart$yhat.test.mean
    }
    if (family$family == "binomial") {
        fitBart <- bart(x.train = X, y.train = as.factor(Y), 
            x.test = newX, ntree = ntree, sigdf = sigdf, sigquant = sigquant, 
            k = k, power = power, base = base, binaryOffset = binaryOffset, 
            ndpost = ndpost, nskip = nskip, verbose = FALSE)
        pred <- pnorm(apply(fitBart$yhat.test, 2, mean))
    }
    fit <- list(object = fitBart)
    out <- list(pred = pred, fit = fit)
    class(out$fit) <- c("SL.bart")
    return(out)
}



# Simulation
n.sim <- 250

resMat <- data.frame(	truth=rep(NA,n.sim),
						OLS=rep(NA,n.sim),
						matching=rep(NA,n.sim),
						naiveIPW=rep(NA,n.sim),
						learnerIPW=rep(NA,n.sim))
set.seed(678)
for(i in 1:n.sim){

n <- 500

# Key covariate

x.sc <- rnorm(n)
x <- x.sc[order(x.sc)]

# Potential outcomes

E.y0 <- x+ .5*(x-min(x))^2
E.y1 <- x+.75*(x-min(x))^2 + .75*(x-min(x))^3
y0 <- E.y0 + rnorm(n,0,sd=5)
y1 <- E.y1 + rnorm(n,0,sd=10)

# Confounded treatment assignment

pZ <- (1+exp(-(-.5+.75*x-.5*(x-mean(x))^2)))^(-1)
Z <- rbinom(n,1,pZ)
Y <- Z*y1 + (1-Z)*y0

graphOn <- FALSE
if(graphOn == TRUE){
pdf(file="sim-graphs.pdf", height=9, width=6)
par(mfrow=c(3,1))
par(mar=c(5,4,5,2))
par(pty="m")
plot(x, pZ, 
		xlab=bquote(W[1]),
		ylab="Propensity score")
title(main=bquote(Propensity~score~over~W[1]))
plot(x, y0, 
		ylim=c(min(c(y0,y1))-5, max(c(y0,y1))+5), 
        xlim=c(min(x),max(x)),
        xlab=bquote(W[1]),
        ylab="Y(1) (filled) and Y(0) (hollow)",
        bg="gray",
        pch=21)
points(x,y1, pch=19, col="black")
title(main=bquote(Potential~outcomes~over~W[1]))
plot(x[Z==0], Y[Z==0], 
		ylim=c(min(c(y0,y1))-5, max(c(y0,y1))+5), 
        xlim=c(min(x),max(x)),
        xlab=bquote(W[1]),
        ylab="Treated (filled) and control (hollow) outcomes",
        bg="gray",
        pch=21)
title(main="Observed data\n (after treatment assignment)",
		font.main=1)
points(x[Z==1], Y[Z==1], pch=19)
dev.off()
}

# Data

dataUp <- data.frame(Y,Z,x)


# Matching
matchOut <- matchit(Z~x,
					data=dataUp,
					method="nearest",
					distance="mahalanobis",
					replace=T, 
					discard="treat")
m.data <- match.data(matchOut)


# Naive IPW
dataUp$pFit <- predict(glm(Z~x, family="binomial"),
				type="response",
				data=dataUp)
dataUp$IPW <- dataUp$Z + (1-dataUp$Z)*(dataUp$pFit/(1-dataUp$pFit))

# IPW via learner

Xmat <- as.data.frame(dataUp[,c("x")])
Zvar <- dataUp[,"Z"]

	nu.admis <- min(min(c(mean(Zvar), 1-mean(Zvar))), .5)
	SL.svm.admis <- function(...,nu=nu.admis){
		SL.svm(...,nu=nu)
	}

	fitLearner <- 	SuperLearner(	Y=Zvar,
									X=Xmat,
						SL.library = c(	"SL.glm", 
										"SL.KRLS", 
										"SL.bart",
										"SL.bayesglm",
										"SL.svm.admis"
										),
						family=binomial(),
						method="method.NNLS",
						verbose=TRUE,
						cvControl=list(V=10))

dataUp$pLearner <- fitLearner$SL.predict
dataUp$learnerIPW <- dataUp$Z + (1-dataUp$Z)*(dataUp$pLearner/(1-dataUp$pLearner))

# Winsorizing for negative weights
if(sum(dataUp$learnerIPW<0)>0){
dataUp$learnerIPW[dataUp$learnerIPW<0] <- min(dataUp$learnerIPW[dataUp$learnerIPW>0])
}

print(sum(is.na(dataUp$IPW)))
print(sum(is.na(dataUp$learnerIPW)))
if(sum(is.na(dataUp$IPW))==0&sum(is.na(dataUp$learnerIPW))==0){
# Results
bTrue 	<- mean((y1-y0)[Z==1])
bOLS 	<- coef(lm(Y~Z+x,
				data=dataUp))["Z"]
bMatch 	<- coef(lm(	Y~Z, 
					data=m.data, 
					weights=m.data$weights))["Z"]
bIPW 	<- coef(lm(Y~Z,
				weights=dataUp$IPW,
				data=dataUp))["Z"]
bLearnerIPW <- coef(lm(Y~Z,
				weights= dataUp$learnerIPW,
				data=dataUp))["Z"]

#i <- 1				
resMat[i,] <- c(bTrue, bOLS, bMatch, bIPW, bLearnerIPW)
write.csv(resMat,
			file="sim1.csv",
			row.names=F)
}

cat(paste(i," ", sep="")); flush.console()

}



